* Settings
version 16
* global SSDIMed "/disk/agedisk4/medicare.work/miller-DUA50377/proj_ssdi"
do "$SSDIMed/scripts/_auxiliary/_project_settings.do"

* Force redo of estimation?
*   redo_estimates = 0: use existing estimates, if available
*   redo_estimates = 1: redo estimation, even if existing estimates are available
local redo_estimates 0

* Sample definitions:
* - main (all ages, all years)
local sample_main 1
* - ages 22-62
local sample_2262 inrange(age_year_covstart_fill, 22, 62)
* - ages 51-52
local sample_5152 inrange(age_year_covstart_fill, 51, 52)
* - first full year
local sample_ffy (rfrnc_yr == init_yr + 1)


* -------------------------------------------------------------------------------------------------
* Part 1. Relate DI entrant outcomes to initial conditions at time of entry
* -------------------------------------------------------------------------------------------------

* DI entrant outcomes, by x, where x = 
*   i.covstart_year              = calendar year of Medicare entry
*   c.unemp_rate_county_atapp    = unemployment rate at time of application
*   i.unemp_rate_q20_atapp       = ventiles of unemp_rate_county_atapp
*   i.age_year_covstart_fill     = age in years at Medicare entry


* -------------------------------------------------------------------------------------------------
* Load data
use "$SSDIMed/data/analysis/bene-year_outcomes_sample-main.dta",  clear
assert dib_sample == 1
qui sum age_year_covstart_fill
assert r(min)==20 & r(max)==62

* Additional outcomes
assert !missing(buyin_mo, hmo_mo, b_mo_cnt)
gen dual = buyin_mo>0 
gen hmo = hmo_mo>0 
gen partb = b_mo_cnt>0 

* Which controls to include in each regression
* + Spec 1:   years enrolled
* + Spec 2:   years enrolled by county (baseline)
* + Spec 3-4: years enrolled by county, sex by age (demog diffs in who enrolls when)
* + Spec 5:   years enrolled by county, rfrnc_yr by county
* + Spec 6:   years enrolled by entry month
* + Spec 7:   years enrolled by county, years enrolled by entry month
local controls_01 i.years_since_covstart
local controls_02 i.years_since_covstart##i.fipscounty_firstnm_g
local controls_03 i.years_since_covstart##i.fipscounty_firstnm_g i.male_init##i.age_year_covstart_fill
local controls_04 i.years_since_covstart##i.fipscounty_firstnm_g i.male_init##i.age
local controls_05 i.years_since_covstart##i.fipscounty_firstnm_g i.rfrnc_yr##i.fipscounty_firstnm_g
local controls_06 i.years_since_covstart##i.covstart_month
local controls_07 i.years_since_covstart##i.fipscounty_firstnm_g i.years_since_covstart##i.covstart_month

* Estimation
local clust county_mofd

* foreach sample in main 2262
local sample 2262

foreach y in tot_pmt died_adj dual hmo partb male_init age_year_covstart_fill {
foreach x in i.covstart_year c.unemp_rate_county_atapp i.unemp_rate_q20_atapp i.age_year_covstart_fill {
foreach ctrl in 01 02 03 04 05 06 07 {
  * Which combinations of Y, X, and other controls to skip
  if inlist("`y'", "age_year_covstart_fill", "male_init") {
    if !inlist("`x'", "i.covstart_year") continue
    if strpos("`controls_`ctrl''", "i.age") > 0 continue
  }
  
  * Skip specs where controls would absorb x (main indep variable)
  if inlist("`x'", "i.age_year_covstart_fill") & strpos("`controls_`ctrl''", "i.age") > 0 continue
  if inlist("`x'", "i.covstart_year") & strpos("`controls_`ctrl''", "i.covstart_month") > 0 continue
  
  * Only do control specs 5+ for y=tot_pmt/died_adj, x=i.covstart_year, c.unemp_rate_county_atapp
  if !(inlist("`y'", "tot_pmt", "died_adj") & inlist("`x'", "i.covstart_year", "c.unemp_rate_county_atapp")) {
    if strpos("`controls_`ctrl''", "i.rfrnc_yr") > 0 continue
  }
  
  * Check if estimation results already exist, do not recompute unless needed
  local results_stub $SSDIMed/results/estimates/x-`x'/x-`x'_y-`y'_controls-`ctrl'_cluster-`clust'_sample-`sample'
  capture confirm file "`results_stub'.dta"
  if max(_rc, `redo_estimates') == 0 {
    di _newline "Using existing estimation results in `results_stub'.dta. Set redo_estimates=1 to recalculate results."
    continue
  }
  
  * Estimation
  local reg_cmd reghdfe `y' `x' if `sample_`sample'', abs(`controls_`ctrl'') cluster(`clust')
  noisily di "`reg_cmd'"
  `reg_cmd'
  
  * Additional estimation stats
  *   Dep var mean
  sum `y' if e(sample)
  local y_mean_esample = r(mean)
  *   Number of unique beneficiaries in esample
  qui glevelsof bene_id if e(sample), nolocal
  local N_benes_esample = r(J)
  
  * Save results
  cap mkdir "$SSDIMed/results/estimates/x-`x'"
  estimates save "`results_stub'.ster", replace
  local addlabel addlabel(y, `y', y_mean_esample, `y_mean_esample', x, `x', controls, `controls_`ctrl'', sample, `sample', cluster, `clust', N_benes_esample, `N_benes_esample')
  regsave using  "`results_stub'.dta", cmdline p ci detail(all) `addlabel' replace
  * use "`results_stub'.dta", clear
  * list var coef stderr pval ci_lower ci_upper N y y_mean_esample x controls sample cluster, sep(0)
}
}
}


* -------------------------------------------------------------------------------------------------
* PART 2: Outcomes by unemployment interacted with 51 vs. 52 years old at entry
use "$SSDIMed/data/analysis/bene-year_outcomes_sample-main.dta",  clear
qui sum age_year_covstart_fill
assert r(min)==20 & r(max)==62

local sample 5152
assert `sample_5152' == inrange(age_year_covstart_fill, 51, 52)
keep if `sample_5152'
gen age52 = (age_year_covstart_fill == 52)
sum unemp_rate_county_atapp
gen UR = unemp_rate_county_atapp-`r(mean)'
gen age52xUR=age52*UR

* Control definitions same as for all-age specification

* Estimation
local clust county_mofd

foreach y in tot_pmt died_adj {
foreach ctrl in 01 02 03 04 05 06 07 {
  noisily di "Working on y = `y' and ctrl = `ctrl'"
  
  * Check if estimation results already exist, do not recompute unless needed
  local results_stub $SSDIMed/results/estimates/x-age52xUR/x-age52xUR_y-`y'_controls-`ctrl'_cluster-`clust'_sample-`sample'
  capture confirm file "`results_stub'.dta"
  if max(_rc, `redo_estimates') == 0 {
    di _newline "Using existing estimation results in `results_stub'.dta. Set redo_estimates=1 to recalculate results."
    continue
  }
  
  * Estimation
  reghdfe `y' age52 UR age52xUR, abs(`controls_`ctrl'') cluster(`clust')
  sum `y' if e(sample)
  local y_mean_esample = r(mean)
  
  * Save results
  cap mkdir "$SSDIMed/results/estimates/x-age52xUR"
  estimates save "`results_stub'.ster", replace
  regsave using  "`results_stub'.dta", cmdline p ci detail(all) addlabel(y, `y', y_mean_esample, `y_mean_esample', x, "age52 UR age52xUR", controls, `controls_`ctrl'', sample, `sample', cluster, `clust') replace
}
}


* -------------------------------------------------------------------------------------------------
* Part 3. Relate DI entry to initial conditions at time of entry
* -------------------------------------------------------------------------------------------------

* -------------------------------------------------------------------------------------------------
* Incidence: average across all ages
use "$SSDIMed/data/analysis/county-month-age_entry_sample-main.dta", clear
qui sum age_year_covstart_fill
assert r(min)==20 & r(max)==62

* Which controls to include in each regression
local controls_02 i.fipscounty_firstnm_g
local controls_03 i.fipscounty_firstnm_g##i.age_year_covstart_fill

* Estimation
local sample 2262
local y incidence_pop_age_atapp
local w pop_age_atapp
local clust county_mofd

foreach x in i.covstart_year c.unemp_rate_county_atapp i.unemp_rate_q20_atapp {
foreach ctrl in 02 03 {
  
  * Check if estimation results already exist, do not recompute unless needed
  local results_stub $SSDIMed/results/estimates/x-`x'/x-`x'_y-`y'_controls-`ctrl'_cluster-`clust'_sample-`sample'
  capture confirm file "`results_stub'.dta"
  if max(_rc, `redo_estimates') == 0 {
    di _newline "Using existing estimation results in `results_stub'.dta. Set redo_estimates=1 to recalculate results."
    continue
  }
  
  * Estimation
  if "`controls_`ctrl''" == "none" regress `y' `x' [aw = `w'] if `sample_`sample'', cluster(`clust')
  else                             reghdfe `y' `x' [aw = `w'] if `sample_`sample'', abs(`controls_`ctrl'') cluster(`clust')
  sum `y' if e(sample) [aw = `w']
  local y_mean_esample = r(mean)
  
  * Save results
  cap mkdir "$SSDIMed/results/estimates/x-`x'"
  estimates save "`results_stub'.ster", replace
  regsave using  "`results_stub'.dta", cmdline p ci detail(all) addlabel(y, `y', y_mean_esample, `y_mean_esample', x, `x', controls, `controls_`ctrl'', sample, `sample', cluster, `clust') replace
  * use "`results_stub'.dta", clear
  * list var coef stderr pval ci_lower ci_upper N y y_mean_esample x controls sample cluster, sep(0)
}
}


* -------------------------------------------------------------------------------------------------
* Part 4: Incidence by unemployment interacted with 51 vs. 52 years old at entry
use "$SSDIMed/data/analysis/county-month-age_entry_sample-main.dta", clear
qui sum age_year_covstart_fill
assert r(min)==20 & r(max)==62

local sample 5152
assert `sample_5152' == inrange(age_year_covstart_fill, 51, 52)
keep if `sample_5152'
gen age52 = (age_year_covstart_fill == 52)
sum unemp_rate_county_atapp [aw = pop_19_61_atapp]
gen UR=unemp_rate_county_atapp-`r(mean)'
gen age52xUR=age52*UR

* Which controls to include in each regression
local controls_01 none
local controls_02 i.fipscounty_firstnm_g

* Estimation
local sample 5152
local y incidence_pop_age_atapp
local w pop_age_atapp
local clust county_mofd

foreach ctrl in 02 {
  * Check if estimation results already exist, do not recompute unless needed
  local results_stub $SSDIMed/results/estimates/x-age52xUR/x-age52xUR_y-`y'_controls-`ctrl'_cluster-`clust'_sample-`sample'
  capture confirm file "`results_stub'.dta"
  if max(_rc, `redo_estimates') == 0 {
    di _newline "Using existing estimation results in `results_stub'.dta. Set redo_estimates=1 to recalculate results."
    continue
  }
  
  * Estimation
  if "`controls_`ctrl''" == "none" regress `y' age52 UR age52xUR [aw = `w'], cluster(`clust')
  else                             reghdfe `y' age52 UR age52xUR [aw = `w'], abs(`controls_`ctrl'') cluster(`clust')
  sum `y' if e(sample) [aw = `w']
  local y_mean_esample = r(mean)
  
  * Save results
  cap mkdir "$SSDIMed/results/estimates/x-age52xUR"
  estimates save "`results_stub'.ster", replace
  regsave using  "`results_stub'.dta", cmdline p ci detail(all) addlabel(y, `y', y_mean_esample, `y_mean_esample', x, "age52 UR age52xUR", controls, `controls_`ctrl'', sample, `sample', cluster, `clust') replace
}


* -------------------------------------------------------------------------------------------------
* Part 5: Incidence: by ages (all ages)
use "$SSDIMed/data/analysis/county-month-age_entry_sample-main.dta", clear
qui sum age_year_covstart_fill
assert r(min)==20 & r(max)==62

* Which controls to include in each regression
local controls_01 i.age_year_covstart_fill
local controls_02 i.fipscounty_firstnm_g##i.age_year_covstart_fill
local controls_03 i.covstart_month##i.age_year_covstart_fill

* Estimation
local sample 2262
local y incidence_pop_age_atapp
local x c.unemp_rate_county_atapp
local w pop_age_atapp
local clust county_mofd

qui foreach ctrl in 01 02 03 {
  * local ctrl 02
  
  * Check if estimation results already exist, do not recompute unless needed
  local results_stub $SSDIMed/results/estimates/x-`x'/x-`x'-age_year_covstart_fill_y-`y'_controls-`ctrl'_cluster-`clust'_sample-`sample'
  
  * Estimation
  reghdfe `y' c.`x'#i.age_year_covstart_fill [aw = `w'] if `sample_`sample'', abs(`controls_`ctrl'') cluster(`clust')
  
  sum `y' if e(sample) [aw = `w']
  local y_mean_esample = r(mean)

  * Save results
  cap mkdir "$SSDIMed/results/estimates/x-`x'"
  estimates save "`results_stub'.ster", replace
  regsave using  "`results_stub'.dta", cmdline p ci detail(all) addlabel(y, `y', y_mean_esample, `y_mean_esample', x, `x', controls, `controls_`ctrl'', sample, `sample', cluster, `clust') replace
}




** EOF
